Combined the factors which influenced by the epidemic and the various characteristics of each London boroughs, this study not only attempts to provide insight on the impact on house prices caused by the pandemic of the different London boroughs, but also shows the individuals’ changed demand for the house.

1.Library Packages

Firstly, Let’s library some packages which we will use in this work

library(tidyverse)
## ─ Attaching packages ──────────────────── tidyverse 1.3.0 ─
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0
## ─ Conflicts ───────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(maptools)
## Loading required package: sp
## Checking rgeos availability: TRUE
library(RColorBrewer)
library(classInt)
library(sp)
library(rgeos)
## rgeos version: 0.5-5, (SVN revision 640)
##  GEOS runtime version: 3.8.1-CAPI-1.13.3 
##  Linking to sp version: 1.4-2 
##  Polygon checking: TRUE
library(tmap)
library(tmaptools)
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.1.4, PROJ 6.3.1
library(rgdal)
## rgdal: version: 1.5-18, (SVN revision 1082)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 3.1.4, released 2020/10/20
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: TRUE 
## Loaded PROJ runtime: Rel. 6.3.1, February 10th, 2020, [PJ_VERSION: 631]
## Path to PROJ shared files: /Library/Frameworks/R.framework/Versions/4.0/Resources/library/rgdal/proj
## Linking to sp version:1.4-4
## To mute warnings of possible GDAL/OSR exportToProj4() degradation,
## use options("rgdal_show_exportToProj4_warnings"="none") before loading rgdal.
library(geojsonio)
## 
## Attaching package: 'geojsonio'
## The following object is masked from 'package:base':
## 
##     pretty
library(tmaptools)
library(ggplot2)
library(corrplot)
## corrplot 0.84 loaded
library(haven)
library(texreg)
## Version:  1.37.5
## Date:     2020-06-17
## Author:   Philip Leifeld (University of Essex)
## 
## Consider submitting praise using the praise or praise_interactive functions.
## Please cite the JSS article in your publications -- see citation("texreg").
## 
## Attaching package: 'texreg'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(dplyr) 
library(tidyr)
library(broom)
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following object is masked from 'package:purrr':
## 
##     some
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(spdep)
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
library(sf)
library(sp)
library(lwgeom)
## Linking to liblwgeom 3.0.0beta1 r16016, GEOS 3.8.1, PROJ 6.3.1
library(spData)
library(spatialreg)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## Registered S3 methods overwritten by 'spatialreg':
##   method                   from 
##   residuals.stsls          spdep
##   deviance.stsls           spdep
##   coef.stsls               spdep
##   print.stsls              spdep
##   summary.stsls            spdep
##   print.summary.stsls      spdep
##   residuals.gmsar          spdep
##   deviance.gmsar           spdep
##   coef.gmsar               spdep
##   fitted.gmsar             spdep
##   print.gmsar              spdep
##   summary.gmsar            spdep
##   print.summary.gmsar      spdep
##   print.lagmess            spdep
##   summary.lagmess          spdep
##   print.summary.lagmess    spdep
##   residuals.lagmess        spdep
##   deviance.lagmess         spdep
##   coef.lagmess             spdep
##   fitted.lagmess           spdep
##   logLik.lagmess           spdep
##   fitted.SFResult          spdep
##   print.SFResult           spdep
##   fitted.ME_res            spdep
##   print.ME_res             spdep
##   print.lagImpact          spdep
##   plot.lagImpact           spdep
##   summary.lagImpact        spdep
##   HPDinterval.lagImpact    spdep
##   print.summary.lagImpact  spdep
##   print.sarlm              spdep
##   summary.sarlm            spdep
##   residuals.sarlm          spdep
##   deviance.sarlm           spdep
##   coef.sarlm               spdep
##   vcov.sarlm               spdep
##   fitted.sarlm             spdep
##   logLik.sarlm             spdep
##   anova.sarlm              spdep
##   predict.sarlm            spdep
##   print.summary.sarlm      spdep
##   print.sarlm.pred         spdep
##   as.data.frame.sarlm.pred spdep
##   residuals.spautolm       spdep
##   deviance.spautolm        spdep
##   coef.spautolm            spdep
##   fitted.spautolm          spdep
##   print.spautolm           spdep
##   summary.spautolm         spdep
##   logLik.spautolm          spdep
##   print.summary.spautolm   spdep
##   print.WXImpact           spdep
##   summary.WXImpact         spdep
##   print.summary.WXImpact   spdep
##   predict.SLX              spdep
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     anova.sarlm, as_dgRMatrix_listw, as_dsCMatrix_I, as_dsCMatrix_IrW,
##     as_dsTMatrix_listw, as.spam.listw, bptest.sarlm, can.be.simmed,
##     cheb_setup, coef.gmsar, coef.sarlm, coef.spautolm, coef.stsls,
##     create_WX, deviance.gmsar, deviance.sarlm, deviance.spautolm,
##     deviance.stsls, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
##     errorsarlm, fitted.gmsar, fitted.ME_res, fitted.sarlm,
##     fitted.SFResult, fitted.spautolm, get.ClusterOption,
##     get.coresOption, get.mcOption, get.VerboseOption,
##     get.ZeroPolicyOption, GMargminImage, GMerrorsar, griffith_sone,
##     gstsls, Hausman.test, HPDinterval.lagImpact, impacts, intImpacts,
##     Jacobian_W, jacobianSetup, l_max, lagmess, lagsarlm, lextrB,
##     lextrS, lextrW, lmSLX, logLik.sarlm, logLik.spautolm, LR.sarlm,
##     LR1.sarlm, LR1.spautolm, LU_prepermutate_setup, LU_setup,
##     Matrix_J_setup, Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
##     mom_calc_int2, moments_setup, powerWeights, predict.sarlm,
##     predict.SLX, print.gmsar, print.ME_res, print.sarlm,
##     print.sarlm.pred, print.SFResult, print.spautolm, print.stsls,
##     print.summary.gmsar, print.summary.sarlm, print.summary.spautolm,
##     print.summary.stsls, residuals.gmsar, residuals.sarlm,
##     residuals.spautolm, residuals.stsls, sacsarlm, SE_classic_setup,
##     SE_interp_setup, SE_whichMin_setup, set.ClusterOption,
##     set.coresOption, set.mcOption, set.VerboseOption,
##     set.ZeroPolicyOption, similar.listw, spam_setup, spam_update_setup,
##     SpatialFiltering, spautolm, spBreg_err, spBreg_lag, spBreg_sac,
##     stsls, subgraph_eigenw, summary.gmsar, summary.sarlm,
##     summary.spautolm, summary.stsls, trW, vcov.sarlm, Wald1.sarlm
library(Matrix)
library(spgwr)
## NOTE: This package does not constitute approval of GWR
## as a method of spatial analysis; see example(gwr)

2.Import Data

Now Let’s import the data and have quick look of the data

hpa <- read_csv("house_price_analyse.csv")
## 
## ─ Column specification ────────────────────────────
## cols(
##   area_name = col_character(),
##   area_code = col_character(),
##   year_by_year_growth = col_double(),
##   claimant_count_rate = col_double(),
##   cases_per_1000000 = col_number(),
##   prop_of_residence = col_double(),
##   aver_num_of_open_space = col_double(),
##   still_eco_secure = col_double(),
##   sales_Oct20 = col_double()
## )
head(hpa)
## # A tibble: 6 x 9
##   area_name area_code year_by_year_gr… claimant_count_… cases_per_10000…
##   <chr>     <chr>                <dbl>            <dbl>            <dbl>
## 1 Barking … E09000002             1.31             10.1            7151.
## 2 Barnet    E09000003             2.01              7.3            4907.
## 3 Bexley    E09000004             3.4               5.7            6285.
## 4 Brent     E09000005             4.82             10.3            4964.
## 5 Bromley   E09000006             2.69              5.5            5075.
## 6 Camden    E09000007             6.52              5.8            3427.
## # … with 4 more variables: prop_of_residence <dbl>,
## #   aver_num_of_open_space <dbl>, still_eco_secure <dbl>, sales_Oct20 <dbl>
hpa_num <- dplyr :: select_if(hpa, is.numeric)

Load London borough shape file

Londonborough <- st_read("ESRI/London_Borough_Excluding_MHW.shp") 
## Reading layer `London_Borough_Excluding_MHW' from data source `/Users/liyiqing/Desktop/GIS final/gis/ESRI/London_Borough_Excluding_MHW.shp' using driver `ESRI Shapefile'
## Simple feature collection with 33 features and 7 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## projected CRS:  OSGB 1936 / British National Grid
st_transform(Londonborough, 27700)
## Simple feature collection with 33 features and 7 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 503568.2 ymin: 155850.8 xmax: 561957.5 ymax: 200933.9
## projected CRS:  OSGB 1936 / British National Grid
## First 10 features:
##                    NAME  GSS_CODE  HECTARES NONLD_AREA ONS_INNER SUB_2009
## 1  Kingston upon Thames E09000021  3726.117      0.000         F     <NA>
## 2               Croydon E09000008  8649.441      0.000         F     <NA>
## 3               Bromley E09000006 15013.487      0.000         F     <NA>
## 4              Hounslow E09000018  5658.541     60.755         F     <NA>
## 5                Ealing E09000009  5554.428      0.000         F     <NA>
## 6              Havering E09000016 11445.735    210.763         F     <NA>
## 7            Hillingdon E09000017 11570.063      0.000         F     <NA>
## 8                Harrow E09000015  5046.330      0.000         F     <NA>
## 9                 Brent E09000005  4323.270      0.000         F     <NA>
## 10               Barnet E09000003  8674.837      0.000         F     <NA>
##    SUB_2006                       geometry
## 1      <NA> MULTIPOLYGON (((516401.6 16...
## 2      <NA> MULTIPOLYGON (((535009.2 15...
## 3      <NA> MULTIPOLYGON (((540373.6 15...
## 4      <NA> MULTIPOLYGON (((521975.8 17...
## 5      <NA> MULTIPOLYGON (((510253.5 18...
## 6      <NA> MULTIPOLYGON (((549893.9 18...
## 7      <NA> MULTIPOLYGON (((510599.8 19...
## 8      <NA> MULTIPOLYGON (((510599.8 19...
## 9      <NA> MULTIPOLYGON (((525201 1825...
## 10     <NA> MULTIPOLYGON (((524579.9 19...
Londonborough <- Londonborough[order(Londonborough$GSS_CODE), ]
head(Londonborough)
## Simple feature collection with 6 features and 7 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 515484.9 ymin: 156480.9 xmax: 554089.2 ymax: 198355.2
## projected CRS:  OSGB 1936 / British National Grid
##                    NAME  GSS_CODE  HECTARES NONLD_AREA ONS_INNER SUB_2009
## 33       City of London E09000001   314.942     24.546         T     <NA>
## 32 Barking and Dagenham E09000002  3779.934    169.150         F     <NA>
## 10               Barnet E09000003  8674.837      0.000         F     <NA>
## 15               Bexley E09000004  6428.649    370.619         F     <NA>
## 9                 Brent E09000005  4323.270      0.000         F     <NA>
## 3               Bromley E09000006 15013.487      0.000         F     <NA>
##    SUB_2006                       geometry
## 33     <NA> MULTIPOLYGON (((531145.1 18...
## 32     <NA> MULTIPOLYGON (((543905.4 18...
## 10     <NA> MULTIPOLYGON (((524579.9 19...
## 15     <NA> MULTIPOLYGON (((547226.2 18...
## 9      <NA> MULTIPOLYGON (((525201 1825...
## 3      <NA> MULTIPOLYGON (((540373.6 15...

3.Preliminary Analysis

#summary the data
summary(hpa)
##   area_name          area_code         year_by_year_growth claimant_count_rate
##  Length:32          Length:32          Min.   :-0.420      Min.   : 4.800     
##  Class :character   Class :character   1st Qu.: 1.340      1st Qu.: 5.800     
##  Mode  :character   Mode  :character   Median : 3.990      Median : 7.950     
##                                        Mean   : 4.027      Mean   : 7.756     
##                                        3rd Qu.: 5.897      3rd Qu.: 9.200     
##                                        Max.   :10.340      Max.   :10.700     
##  cases_per_1000000 prop_of_residence aver_num_of_open_space still_eco_secure
##  Min.   :3427      Min.   :50.60     Min.   : 4.170         Min.   :26.00   
##  1st Qu.:4470      1st Qu.:62.55     1st Qu.: 5.032         1st Qu.:34.00   
##  Median :4963      Median :68.35     Median : 5.750         Median :44.00   
##  Mean   :5130      Mean   :66.88     Mean   : 6.478         Mean   :41.75   
##  3rd Qu.:5448      3rd Qu.:72.20     3rd Qu.: 6.955         3rd Qu.:47.25   
##  Max.   :7658      Max.   :75.10     Max.   :12.710         Max.   :56.00   
##   sales_Oct20   
##  Min.   : 77.0  
##  1st Qu.:119.8  
##  Median :140.0  
##  Mean   :159.0  
##  3rd Qu.:188.8  
##  Max.   :315.0

plot the histgram of the house price year by year growth rate distribution

library(ggplot2)
# set up the basic histogram
gghist1 <- ggplot(hpa, 
                 aes(x = year_by_year_growth)) + 
  geom_histogram(color="black", 
                 fill="white")+
  labs(title="House Price Year by Year Growth Rate Distribution", 
       x="house price year by year growth", 
       y="Frequency")
gghist1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

library(ggplot2)
# set up the basic histogram
gghist1 <- ggplot(hpa, 
                 aes(x = sqrt(year_by_year_growth))) + 
  geom_histogram(color="black", 
                 fill="white")+
  labs(title="House Price Year by Year Growth Rate Distribution", 
       x="house price year by year growth", 
       y="Frequency")
gghist1
## Warning in sqrt(year_by_year_growth): 产生了NaNs

## Warning in sqrt(year_by_year_growth): 产生了NaNs
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite values (stat_bin).

library(ggplot2)
# set up the basic histogram
gghist1 <- ggplot(hpa, 
                 aes(x = log(year_by_year_growth))) + 
  geom_histogram(color="black", 
                 fill="white")+
  labs(title="House Price Year by Year Growth Rate Distribution", 
       x="house price year by year growth", 
       y="Frequency")
gghist1
## Warning in log(year_by_year_growth): 产生了NaNs

## Warning in log(year_by_year_growth): 产生了NaNs
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite values (stat_bin).

map the house price year by year growth rate

#merge boundaries and data
hpa <- Londonborough%>%
  left_join(.,
            hpa, 
            by = c("GSS_CODE" = "area_code"))

hpa <- hpa[-1, ]
#let's map our dependent variable to see if the join has worked:
tmap_mode("view")
## tmap mode set to interactive viewing
qtm(hpa, 
    fill = "year_by_year_growth", 
    borders = "black",  
    fill.palette = "Blues")

map the independent variables

plot(hpa[10:15])

4. Explore the Correlation Between Datas

plot the correlation between the features

#create a dadaframe of the numeric datas
corrplot(cor(hpa_num))

ggpairs

ggpairs(hpa_num,axisLabels="none")

5. Build Linear Regression Models and Model Selection

5.1 Train models with original data

Build the models contains all the independent variables

lm.max <- lm(year_by_year_growth ~ ., data = hpa_num)
summary(lm.max)
## 
## Call:
## lm(formula = year_by_year_growth ~ ., data = hpa_num)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.2551 -1.4765 -0.0392  1.6454  5.8746 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)             5.1150229  8.7760178   0.583    0.565
## claimant_count_rate    -0.0753606  0.6814198  -0.111    0.913
## cases_per_1000000      -0.0005628  0.0006417  -0.877    0.389
## prop_of_residence       0.0174010  0.1322549   0.132    0.896
## aver_num_of_open_space -0.0967192  0.3195321  -0.303    0.765
## still_eco_secure        0.0561146  0.1433867   0.391    0.699
## sales_Oct20            -0.0031184  0.0119674  -0.261    0.797
## 
## Residual standard error: 2.982 on 25 degrees of freedom
## Multiple R-squared:   0.11,  Adjusted R-squared:  -0.1036 
## F-statistic: 0.5149 on 6 and 25 DF,  p-value: 0.7914

5.2 Variable selection

Use Leaps to select the variables

library(leaps)

regsubsets.out <- regsubsets( year_by_year_growth ~ .,
                              data = hpa_num,
                              nbest = 1,
                              nvmax = NULL,
                              force.in = NULL, force.out = NULL,
                              method = 'exhaustive')
summary(regsubsets.out)
## Subset selection object
## Call: regsubsets.formula(year_by_year_growth ~ ., data = hpa_num, nbest = 1, 
##     nvmax = NULL, force.in = NULL, force.out = NULL, method = "exhaustive")
## 6 Variables  (and intercept)
##                        Forced in Forced out
## claimant_count_rate        FALSE      FALSE
## cases_per_1000000          FALSE      FALSE
## prop_of_residence          FALSE      FALSE
## aver_num_of_open_space     FALSE      FALSE
## still_eco_secure           FALSE      FALSE
## sales_Oct20                FALSE      FALSE
## 1 subsets of each size up to 6
## Selection Algorithm: exhaustive
##          claimant_count_rate cases_per_1000000 prop_of_residence
## 1  ( 1 ) " "                 "*"               " "              
## 2  ( 1 ) " "                 "*"               " "              
## 3  ( 1 ) " "                 "*"               " "              
## 4  ( 1 ) " "                 "*"               " "              
## 5  ( 1 ) " "                 "*"               "*"              
## 6  ( 1 ) "*"                 "*"               "*"              
##          aver_num_of_open_space still_eco_secure sales_Oct20
## 1  ( 1 ) " "                    " "              " "        
## 2  ( 1 ) " "                    "*"              " "        
## 3  ( 1 ) "*"                    "*"              " "        
## 4  ( 1 ) "*"                    "*"              "*"        
## 5  ( 1 ) "*"                    "*"              "*"        
## 6  ( 1 ) "*"                    "*"              "*"
as.data.frame(summary(regsubsets.out)$outmat)
##          claimant_count_rate cases_per_1000000 prop_of_residence
## 1  ( 1 )                                     *                  
## 2  ( 1 )                                     *                  
## 3  ( 1 )                                     *                  
## 4  ( 1 )                                     *                  
## 5  ( 1 )                                     *                 *
## 6  ( 1 )                   *                 *                 *
##          aver_num_of_open_space still_eco_secure sales_Oct20
## 1  ( 1 )                                                    
## 2  ( 1 )                                       *            
## 3  ( 1 )                      *                *            
## 4  ( 1 )                      *                *           *
## 5  ( 1 )                      *                *           *
## 6  ( 1 )                      *                *           *
plot(regsubsets.out, scale='adjr2', main='Adjusted Rsq')

According to the results show above, “cases_per_1000000”, “aver_num_of_open_space”, and “still_eco_secure” are the top three variables which are more significant to the dependent variable. So let’s build a model employed these tree variables and check the performance of the model.

lm_sel <- lm(year_by_year_growth ~ cases_per_1000000 + aver_num_of_open_space + still_eco_secure, data = hpa_num)
summary(lm_sel)
## 
## Call:
## lm(formula = year_by_year_growth ~ cases_per_1000000 + aver_num_of_open_space + 
##     still_eco_secure, data = hpa_num)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.037 -1.645 -0.154  1.476  6.018 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)             5.5231079  5.4814034   1.008    0.322
## cases_per_1000000      -0.0006206  0.0005813  -1.068    0.295
## aver_num_of_open_space -0.0776471  0.2387486  -0.325    0.747
## still_eco_secure        0.0524703  0.0725177   0.724    0.475
## 
## Residual standard error: 2.824 on 28 degrees of freedom
## Multiple R-squared:  0.1061, Adjusted R-squared:  0.01037 
## F-statistic: 1.108 on 3 and 28 DF,  p-value: 0.3623
hpa_num <- hpa_num[,c(-2, -4, -7)]
hpa <- hpa[,c(-10, -12, -15)]

5.3 Make sure the model meet the linear regression assumptions

As we can see that the new model dose not perform so well. So our final selection is the lm_new.max.

1.the residuals are normally distributed

#save the residuals into the datafram
lm_modeldata_new <- lm_sel %>%
  augment(., hpa_num)

#plot residuals
lm_modeldata_new%>%
dplyr::select(.resid)%>%
  pull()%>%
  qplot()+ 
  geom_histogram() 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

2.There is no multicolinearity among the independent variables

vif(lm_sel)
##      cases_per_1000000 aver_num_of_open_space       still_eco_secure 
##               1.491353               1.162065               1.459810

3.Homoscedasticity plot the residuals against the predicted values.

par(mfrow=c(2,2))    #plot to 2 by 2 array
plot(lm_sel)

  1. The residuals of the model are not correlated.(Check the standard autocorrelation)
DW <- durbinWatsonTest(lm_sel)
tidy(DW)
## # A tibble: 1 x 5
##   statistic p.value autocorrelation method             alternative
##       <dbl>   <dbl>           <dbl> <chr>              <chr>      
## 1      1.57   0.206           0.158 Durbin-Watson Test two.sided

6.Spatial Autocorrelation

create the new dataframe

#create a new data frame with the transformed data
hpa$lm_resids = lm_sel$residuals

map the residuals

tmap_mode("view")
## tmap mode set to interactive viewing
qtm(hpa, 
    fill = "lm_resids", 
    borders = "black",  
    fill.palette = "-RdBu")
## Variable(s) "lm_resids" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#calculate the centroids of boroughs in London
coordsB <- hpa%>%
  st_centroid()%>%
  st_geometry()
## Warning in st_centroid.sf(.): st_centroid assumes attributes are constant over
## geometries of x
plot(coordsB)

#queen's case
LB_nb <- hpa %>%
  poly2nb(., queen=T)

#nearest neighbours
knn_B1 <-coordsB %>%
  knearneigh(., k=3)
knn_B2 <-coordsB %>%
  knearneigh(., k=5)

LB_knn3 <- knn_B1 %>%
  knn2nb()
LB_knn5 <- knn_B2 %>%
  knn2nb()
#plot them
plot(LB_nb, st_geometry(coordsB), col="red")

plot(LB_knn3, st_geometry(coordsB), col="blue")

plot(LB_knn5, st_geometry(coordsB), col="blue")

#create a spatial weights matrix object from these weights
LB_queens_weight <- LB_nb %>%
  nb2listw(., style="C")

LB.knn_3_weight <- LB_knn3 %>%
  nb2listw(., style="C")

LB.knn_5_weight <- LB_knn5 %>%
  nb2listw(., style="C")
Queen <- hpa %>%
  st_drop_geometry()%>%
  dplyr::select(lm_resids)%>%
  pull()%>%
  moran.test(., LB_queens_weight)%>%
  tidy()
Queen
## # A tibble: 1 x 7
##   estimate1 estimate2 estimate3 statistic p.value method             alternative
##       <dbl>     <dbl>     <dbl>     <dbl>   <dbl> <chr>              <chr>      
## 1    -0.139   -0.0323    0.0135    -0.919   0.821 Moran I test unde… greater
Nearest_neighbour1 <- hpa %>%
  st_drop_geometry()%>%
  dplyr::select(lm_resids)%>%
  pull()%>%
  moran.test(., LB.knn_3_weight)%>%
  tidy()
Nearest_neighbour1
## # A tibble: 1 x 7
##   estimate1 estimate2 estimate3 statistic p.value method             alternative
##       <dbl>     <dbl>     <dbl>     <dbl>   <dbl> <chr>              <chr>      
## 1   -0.0863   -0.0323    0.0163    -0.423   0.664 Moran I test unde… greater
Nearest_neighbour2 <- hpa %>%
  st_drop_geometry()%>%
  dplyr::select(lm_resids)%>%
  pull()%>%
  moran.test(., LB.knn_5_weight)%>%
  tidy()
Nearest_neighbour2
## # A tibble: 1 x 7
##   estimate1 estimate2 estimate3 statistic p.value method             alternative
##       <dbl>     <dbl>     <dbl>     <dbl>   <dbl> <chr>              <chr>      
## 1   -0.0579   -0.0323   0.00885    -0.272   0.607 Moran I test unde… greater

7.Spatial Regression Models

7.1 SLM

slm_queen <- lagsarlm(year_by_year_growth ~ cases_per_1000000 +
                        aver_num_of_open_space + still_eco_secure,
               data = hpa, 
               nb2listw(LB_nb, style="C"), 
               method = "eigen")

#what do the outputs show?
summary(slm_queen)
## 
## Call:lagsarlm(formula = year_by_year_growth ~ cases_per_1000000 + 
##     aver_num_of_open_space + still_eco_secure, data = hpa, listw = nb2listw(LB_nb, 
##     style = "C"), method = "eigen")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -4.96269 -1.64203 -0.14911  1.53668  6.15674 
## 
## Type: lag 
## Coefficients: (asymptotic standard errors) 
##                           Estimate  Std. Error z value Pr(>|z|)
## (Intercept)             6.17264229  5.77035958  1.0697   0.2847
## cases_per_1000000      -0.00067612  0.00058258 -1.1606   0.2458
## aver_num_of_open_space -0.08974996  0.22447101 -0.3998   0.6893
## still_eco_secure        0.05019442  0.06912625  0.7261   0.4678
## 
## Rho: -0.046, LR test value: 0.04416, p-value: 0.83356
## Asymptotic standard error: 0.21148
##     z-value: -0.21751, p-value: 0.82781
## Wald statistic: 0.047311, p-value: 0.82781
## 
## Log likelihood: -76.46988 for lag model
## ML residual variance (sigma squared): 6.9654, (sigma: 2.6392)
## Number of observations: 32 
## Number of parameters estimated: 6 
## AIC: 164.94, (AIC for lm: 162.98)
## LM test for residual autocorrelation
## test value: 3.0361, p-value: 0.081431
hpa <- hpa %>%
  mutate(slm_queen_resids = residuals(slm_queen))

QueenMoran <- hpa %>%
  st_drop_geometry()%>%
  dplyr::select(slm_queen_resids)%>%
  pull()%>%
  moran.test(., LB_queens_weight)%>%
  tidy()

QueenMoran
## # A tibble: 1 x 7
##   estimate1 estimate2 estimate3 statistic p.value method             alternative
##       <dbl>     <dbl>     <dbl>     <dbl>   <dbl> <chr>              <chr>      
## 1    -0.120   -0.0323    0.0135    -0.752   0.774 Moran I test unde… greater

7.2 SEM

sem_queen <- errorsarlm(year_by_year_growth ~ cases_per_1000000 +
                        aver_num_of_open_space + still_eco_secure,
               data = hpa, 
               nb2listw(LB_nb, style="C"), 
               method = "eigen")

summary(sem_queen)
## 
## Call:errorsarlm(formula = year_by_year_growth ~ cases_per_1000000 + 
##     aver_num_of_open_space + still_eco_secure, data = hpa, listw = nb2listw(LB_nb, 
##     style = "C"), method = "eigen")
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -4.62616 -1.61717  0.12334  1.49409  6.16708 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                           Estimate  Std. Error z value Pr(>|z|)
## (Intercept)             6.41138255  4.18272878  1.5328  0.12532
## cases_per_1000000      -0.00073080  0.00043727 -1.6713  0.09467
## aver_num_of_open_space -0.16321491  0.19572110 -0.8339  0.40433
## still_eco_secure        0.05941741  0.05443118  1.0916  0.27501
## 
## Lambda: -0.34605, LR test value: 1.5778, p-value: 0.20908
## Asymptotic standard error: 0.26434
##     z-value: -1.3091, p-value: 0.1905
## Wald statistic: 1.7138, p-value: 0.1905
## 
## Log likelihood: -75.70308 for error model
## ML residual variance (sigma squared): 6.4553, (sigma: 2.5407)
## Number of observations: 32 
## Number of parameters estimated: 6 
## AIC: 163.41, (AIC for lm: 162.98)

7.3 SDM

queen.listw <- LB_queens_weight
listw1 <-  queen.listw

7.4 Local Moran’s I

localmi <- localmoran(hpa$year_by_year_growth, listw=listw1, alternative = "greater")
localmi
##              Ii        E.Ii    Var.Ii        Z.Ii   Pr(z > 0)
## 2   0.474777015 -0.02457757 0.1729812  1.20063103 0.114947179
## 3   0.047633769 -0.04096262 0.2687297  0.17090635 0.432148703
## 4   0.083736320 -0.01638505 0.1192353  0.28995085 0.385926919
## 5  -0.019092158 -0.05734767 0.3488203  0.06477287 0.474177422
## 6   0.233281163 -0.04915515 0.3107323  0.50667253 0.306192303
## 7  -0.109140001 -0.04096262 0.2687297 -0.13151717 0.552316898
## 8   0.286296970 -0.03277010 0.2228127  0.67594601 0.249537461
## 9  -1.050233956 -0.04096262 0.2687297 -1.94692893 0.974228372
## 10  0.008964112 -0.02457757 0.1729812  0.08064648 0.467861552
## 11  0.127959929 -0.02457757 0.1729812  0.36675593 0.356900540
## 12 -0.293041759 -0.04096262 0.2687297 -0.48627178 0.686612757
## 13 -1.472841412 -0.03277010 0.2228127 -3.05080206 0.998858845
## 14  0.004270353 -0.04915515 0.3107323  0.09584190 0.461823069
## 15 -0.301245603 -0.03277010 0.2228127 -0.56876741 0.715243001
## 16  0.236960576 -0.01638505 0.1192353  0.73368733 0.231569661
## 17 -0.003915200 -0.02457757 0.1729812  0.04967990 0.480188737
## 18 -0.054424607 -0.03277010 0.2228127 -0.04587524 0.518295157
## 19 -0.091740296 -0.02457757 0.1729812 -0.16148375 0.564143794
## 20 -1.277765927 -0.02457757 0.1729812 -3.01312307 0.998707130
## 21  1.096541778 -0.03277010 0.2228127  2.39245582 0.008368021
## 22 -0.008825127 -0.04096262 0.2687297  0.06199465 0.475283548
## 23 -0.146758059 -0.02457757 0.1729812 -0.29376657 0.615531858
## 24  1.393099625 -0.04096262 0.2687297  2.76636935 0.002834215
## 25  0.388049675 -0.04096262 0.2687297  0.82758365 0.203953166
## 26 -0.044127480 -0.03277010 0.2228127 -0.02406070 0.509597905
## 27  0.523661556 -0.02457757 0.1729812  1.31816734 0.093723817
## 28  0.116540759 -0.02457757 0.1729812  0.33930007 0.367191844
## 29  0.036214389 -0.02457757 0.1729812  0.14616611 0.441895131
## 30  0.003468807 -0.01638505 0.1192353  0.05749664 0.477074791
## 31 -0.373094377 -0.04096262 0.2687297 -0.64069681 0.739140157
## 32  0.909695544 -0.03277010 0.2228127  1.99662065 0.022933204
## 33 -1.051223044 -0.02457757 0.1729812 -2.46843113 0.993214662
## attr(,"call")
## localmoran(x = hpa$year_by_year_growth, listw = listw1, alternative = "greater")
## attr(,"class")
## [1] "localmoran" "matrix"     "array"
hpa$hpmi <- localmi[,1]
tmap_mode("view")
## tmap mode set to interactive viewing
qtm(hpa, 
    fill = "hpmi", 
    borders = "black",  
    fill.palette = "Blues")

#7.5 GWR

st_crs(hpa) = 27700
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
hpa <- hpa %>%
  as(., "Spatial")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on Airy 1830 ellipsoid in CRS
## definition
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum OSGB 1936 in CRS definition
st_crs(coordsB) = 2770
## Warning: st_crs<- : replacing crs does not reproject data; use st_transform for
## that
coordsBSP <- coordsB %>%
  as(., "Spatial")
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj =
## prefer_proj): Discarded datum Unknown based on GRS80 ellipsoid in CRS definition
## Warning in showSRID(SRS_string, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum NAD83 (High Accuracy Reference Network) in CRS
## definition
GWRbandwidth <- gwr.sel(year_by_year_growth ~ cases_per_1000000 +
                        aver_num_of_open_space + still_eco_secure,
                        data = hpa,
                        coords=coordsBSP,
                        adapt=T)
## Warning in gwr.sel(year_by_year_growth ~ cases_per_1000000 +
## aver_num_of_open_space + : data is Spatial* object, ignoring coords argument
## Adaptive q: 0.381966 CV score: 299.4925 
## Adaptive q: 0.618034 CV score: 291.8842 
## Adaptive q: 0.763932 CV score: 289.2583 
## Adaptive q: 0.9325207 CV score: 287.3688 
## Adaptive q: 0.8681255 CV score: 287.9386 
## Adaptive q: 0.9582955 CV score: 287.298 
## Adaptive q: 0.9657041 CV score: 287.2862 
## Adaptive q: 0.978804 CV score: 287.1715 
## Adaptive q: 0.9869001 CV score: 287.1073 
## Adaptive q: 0.9919038 CV score: 287.0754 
## Adaptive q: 0.9949963 CV score: 287.058 
## Adaptive q: 0.9969075 CV score: 287.0481 
## Adaptive q: 0.9980888 CV score: 287.0422 
## Adaptive q: 0.9988188 CV score: 287.0387 
## Adaptive q: 0.99927 CV score: 287.0365 
## Adaptive q: 0.9995488 CV score: 287.0352 
## Adaptive q: 0.9997212 CV score: 287.0344 
## Adaptive q: 0.9998277 CV score: 287.0339 
## Adaptive q: 0.9998935 CV score: 287.0336 
## Adaptive q: 0.9999342 CV score: 287.0334 
## Adaptive q: 0.9999342 CV score: 287.0334
## Warning in gwr.sel(year_by_year_growth ~ cases_per_1000000 +
## aver_num_of_open_space + : Bandwidth converged to upper bound:1

build GWR model

gwr.model = gwr(year_by_year_growth ~ cases_per_1000000 +
                        aver_num_of_open_space + still_eco_secure,
                        data = hpa,
                        coords=coordsBSP,
                adapt=GWRbandwidth, 
                hatmatrix=TRUE, 
                se.fit=TRUE)
## Warning in gwr(year_by_year_growth ~ cases_per_1000000 + aver_num_of_open_space
## + : data is Spatial* object, ignoring coords argument
## Warning in proj4string(data): CRS object has comment, which is lost in output
## Warning in showSRID(uprojargs, format = "PROJ", multiline = "NO", prefer_proj
## = prefer_proj): Discarded datum Unknown based on Airy 1830 ellipsoid in CRS
## definition
gwr.model
## Call:
## gwr(formula = year_by_year_growth ~ cases_per_1000000 + aver_num_of_open_space + 
##     still_eco_secure, data = hpa, coords = coordsBSP, adapt = GWRbandwidth, 
##     hatmatrix = TRUE, se.fit = TRUE)
## Kernel function: gwr.Gauss 
## Adaptive quantile: 0.9999342 (about 31 of 32 data points)
## Summary of GWR coefficient estimates at data points:
##                               Min.     1st Qu.      Median     3rd Qu.
## X.Intercept.            4.95160842  5.04396480  5.16991195  5.34675252
## cases_per_1000000      -0.00061984 -0.00059530 -0.00057082 -0.00055833
## aver_num_of_open_space -0.10744676 -0.09296139 -0.08211238 -0.07348976
## still_eco_secure        0.60822096  0.63670620  0.66842729  0.68615488
##                               Max.  Global
## X.Intercept.            5.62925403  5.5231
## cases_per_1000000      -0.00053395 -0.0006
## aver_num_of_open_space -0.05697641 -0.0776
## still_eco_secure        0.69308843  0.0525
## Number of data points: 32 
## Effective number of parameters (residual: 2traceS - traceS'S): 4.694045 
## Effective degrees of freedom (residual: 2traceS - traceS'S): 27.30595 
## Sigma (residual: 2traceS - traceS'S): 2.834778 
## Effective number of parameters (model: traceS): 4.363394 
## Effective degrees of freedom (model: traceS): 27.63661 
## Sigma (model: traceS): 2.817769 
## Sigma (ML): 2.618621 
## AICc (GWR p. 61, eq 2.33; p. 96, eq. 4.21): 165.8109 
## AIC (GWR p. 96, eq. 4.22): 156.7849 
## Residual sum of squares: 219.4297 
## Quasi-global R2: 0.121715

analyse the results of GWR

results <- as.data.frame(gwr.model$SDF)
names(results)
##  [1] "sum.w"                         "X.Intercept."                 
##  [3] "cases_per_1000000"             "aver_num_of_open_space"       
##  [5] "still_eco_secure"              "X.Intercept._se"              
##  [7] "cases_per_1000000_se"          "aver_num_of_open_space_se"    
##  [9] "still_eco_secure_se"           "gwr.e"                        
## [11] "pred"                          "pred.se"                      
## [13] "localR2"                       "still_eco_secure_EDF"         
## [15] "X.Intercept._se_EDF"           "cases_per_1000000_se_EDF"     
## [17] "aver_num_of_open_space_se_EDF" "still_eco_secure_se_EDF"      
## [19] "pred.se.1"
hpa_gwr <- hpa 
hpa_gwr$coefcasesrate <- results$cases_per_1000000
hpa_gwr$coefaveros <- results$aver_num_of_open_space
hpa_gwr$coefeco <- results$still_eco_secure
hpa_gwr$localr <- results$localR2

run significance test

#run the significance test
sigTest_cases = abs(gwr.model$SDF$"cases_per_1000000")-2 * gwr.model$SDF$"cases_per_1000000_se"

#store significance results
hpa_gwr$casesig = sigTest_cases
#run the significance test
sigTest_aver = abs(gwr.model$SDF$"aver_num_of_open_space")-2 * gwr.model$SDF$"aver_num_of_open_space_se"


#store significance results
hpa_gwr$aversig = sigTest_aver
#run the significance test
sigTest_eco = abs(gwr.model$SDF$"still_eco_secure")-2 * gwr.model$SDF$"still_eco_secure_se"


#store significance results
hpa_gwr$ecosig = sigTest_eco

map the sdandard errors

tm_shape(hpa_gwr) +
  tm_polygons(col = "casesig", 
              palette = "Blues",
              alpha = 0.6)
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
tm_shape(hpa_gwr) +
  tm_polygons(col = "aversig", 
              palette = "Blues",
              alpha = 0.6)
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output
tm_shape(hpa_gwr) +
  tm_polygons(col = "ecosig", 
              palette = "Reds", 
              alpha = 0.6)
## Warning in sp::proj4string(obj): CRS object has comment, which is lost in output